// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <algorithm>
#include <set>
#include <unordered_map>
#include <unordered_set>

#include "src/torque/ast.h"
#include "src/torque/earley-parser.h"
#include "src/torque/utils.h"

namespace v8 {
namespace internal {
    namespace torque {

        namespace {

            struct LineAndColumnTracker {
                LineAndColumn previous { 0, 0 };
                LineAndColumn current { 0, 0 };

                void Advance(InputPosition from, InputPosition to)
                {
                    previous = current;
                    while (from != to) {
                        if (*from == '\n') {
                            current.line += 1;
                            current.column = 0;
                        } else {
                            current.column += 1;
                        }
                        ++from;
                    }
                }

                SourcePosition ToSourcePosition()
                {
                    return { CurrentSourceFile::Get(), previous, current };
                }
            };

        } // namespace

        base::Optional<ParseResult> Rule::RunAction(const Item* completed_item,
            const LexerResult& tokens) const
        {
            std::vector<ParseResult> results;
            for (const Item* child : completed_item->Children()) {
                if (!child)
                    continue;
                base::Optional<ParseResult> child_result = child->left()->RunAction(child, tokens);
                if (child_result)
                    results.push_back(std::move(*child_result));
            }
            MatchedInput matched_input = completed_item->GetMatchedInput(tokens);
            CurrentSourcePosition::Scope pos_scope(matched_input.pos);
            ParseResultIterator iterator(std::move(results), matched_input);
            return action_(&iterator);
        }

        Symbol& Symbol::operator=(std::initializer_list<Rule> rules)
        {
            rules_.clear();
            for (const Rule& rule : rules) {
                AddRule(rule);
            }
            return *this;
        }

        std::vector<const Item*> Item::Children() const
        {
            std::vector<const Item*> children;
            for (const Item* current = this; current->prev_; current = current->prev_) {
                children.push_back(current->child_);
            }
            // The above loop collects the child nodes in reversed order.
            std::reverse(children.begin(), children.end());
            DCHECK_EQ(children.size(), right().size());
            return children;
        }

        std::string Item::SplitByChildren(const LexerResult& tokens) const
        {
            if (right().size() == 1) {
                if (const Item* child = Children()[0])
                    return child->SplitByChildren(tokens);
            }
            std::stringstream s;
            bool first = true;
            for (const Item* item : Children()) {
                if (!item)
                    continue;
                if (!first)
                    s << "  ";
                s << item->GetMatchedInput(tokens).ToString();
                first = false;
            }
            return s.str();
        }

        void Item::CheckAmbiguity(const Item& other, const LexerResult& tokens) const
        {
            DCHECK(*this == other);
            if (child_ != other.child_) {
                std::stringstream s;
                s << "Ambiguous grammer rules for \""
                  << child_->GetMatchedInput(tokens).ToString() << "\":\n   "
                  << child_->SplitByChildren(tokens) << "\nvs\n   "
                  << other.child_->SplitByChildren(tokens);
                ReportError(s.str());
            }
            if (prev_ != other.prev_) {
                std::stringstream s;
                s << "Ambiguous grammer rules for \"" << GetMatchedInput(tokens).ToString()
                  << "\":\n   " << SplitByChildren(tokens) << "  ...\nvs\n   "
                  << other.SplitByChildren(tokens) << "  ...";
                ReportError(s.str());
            }
        }

        LexerResult Lexer::RunLexer(const std::string& input)
        {
            LexerResult result;
            InputPosition const begin = input.c_str();
            InputPosition const end = begin + input.size();
            InputPosition pos = begin;
            InputPosition token_start = pos;
            LineAndColumnTracker line_column_tracker;

            match_whitespace_(&pos);
            line_column_tracker.Advance(token_start, pos);
            while (pos != end) {
                token_start = pos;
                Symbol* symbol = MatchToken(&pos, end);
                InputPosition token_end = pos;
                line_column_tracker.Advance(token_start, token_end);
                if (!symbol) {
                    CurrentSourcePosition::Scope pos_scope(
                        line_column_tracker.ToSourcePosition());
                    ReportError("Lexer Error: unknown token " + StringLiteralQuote(std::string(token_start, token_start + std::min<ptrdiff_t>(end - token_start, 10))));
                }
                result.token_symbols.push_back(symbol);
                result.token_contents.push_back(
                    { token_start, pos, line_column_tracker.ToSourcePosition() });
                match_whitespace_(&pos);
                line_column_tracker.Advance(token_end, pos);
            }

            // Add an additional token position to simplify corner cases.
            line_column_tracker.Advance(token_start, pos);
            result.token_contents.push_back(
                { pos, pos, line_column_tracker.ToSourcePosition() });
            return result;
        }

        Symbol* Lexer::MatchToken(InputPosition* pos, InputPosition end)
        {
            InputPosition token_start = *pos;
            Symbol* symbol = nullptr;
            // Find longest matching pattern.
            for (std::pair<const PatternFunction, Symbol>& pair : patterns_) {
                InputPosition token_end = token_start;
                PatternFunction matchPattern = pair.first;
                if (matchPattern(&token_end) && token_end > *pos) {
                    *pos = token_end;
                    symbol = &pair.second;
                }
            }
            // Check if matched pattern coincides with a keyword. Prefer the keyword in
            // this case.
            if (*pos != token_start) {
                auto found_keyword = keywords_.find(std::string(token_start, *pos));
                if (found_keyword != keywords_.end()) {
                    return &found_keyword->second;
                }
                return symbol;
            }
            // Now check for a keyword (that doesn't overlap with a pattern).
            // Iterate from the end to ensure that if one keyword is a prefix of another,
            // we first try to match the longer one.
            for (auto it = keywords_.rbegin(); it != keywords_.rend(); ++it) {
                const std::string& keyword = it->first;
                if (static_cast<size_t>(end - *pos) < keyword.size())
                    continue;
                if (keyword == std::string(*pos, *pos + keyword.size())) {
                    *pos += keyword.size();
                    return &it->second;
                }
            }
            return nullptr;
        }

        // This is an implementation of Earley's parsing algorithm
        // (https://en.wikipedia.org/wiki/Earley_parser).
        const Item* RunEarleyAlgorithm(
            Symbol* start, const LexerResult& tokens,
            std::unordered_set<Item, base::hash<Item>>* processed)
        {
            // Worklist for items at the current position.
            std::vector<Item> worklist;
            // Worklist for items at the next position.
            std::vector<Item> future_items;
            CurrentSourcePosition::Scope source_position(
                SourcePosition { CurrentSourceFile::Get(), { 0, 0 }, { 0, 0 } });
            std::vector<const Item*> completed_items;
            std::unordered_map<std::pair<size_t, Symbol*>, std::set<const Item*>,
                base::hash<std::pair<size_t, Symbol*>>>
                waiting;

            std::vector<const Item*> debug_trace;

            // Start with one top_level symbol mapping to the start symbol of the grammar.
            // This simplifies things because the start symbol might have several
            // rules.
            Symbol top_level;
            top_level.AddRule(Rule({ start }));
            worklist.push_back(Item { top_level.rule(0), 0, 0, 0 });

            size_t input_length = tokens.token_symbols.size();

            for (size_t pos = 0; pos <= input_length; ++pos) {
                while (!worklist.empty()) {
                    auto insert_result = processed->insert(worklist.back());
                    const Item& item = *insert_result.first;
                    DCHECK_EQ(pos, item.pos());
                    MatchedInput last_token = tokens.token_contents[pos];
                    CurrentSourcePosition::Get() = last_token.pos;
                    bool is_new = insert_result.second;
                    if (!is_new)
                        item.CheckAmbiguity(worklist.back(), tokens);
                    worklist.pop_back();
                    if (!is_new)
                        continue;

                    debug_trace.push_back(&item);
                    if (item.IsComplete()) {
                        // 'Complete' phase: Advance all items that were waiting to match this
                        // symbol next.
                        for (const Item* parent : waiting[{ item.start(), item.left() }]) {
                            worklist.push_back(parent->Advance(pos, &item));
                        }
                    } else {
                        Symbol* next = item.NextSymbol();
                        // 'Scan' phase: Check if {next} is the next symbol in the input (this
                        // is never the case if {next} is a non-terminal).
                        if (pos < tokens.token_symbols.size() && tokens.token_symbols[pos] == next) {
                            future_items.push_back(item.Advance(pos + 1, nullptr));
                        }
                        // 'Predict' phase: Add items for every rule of the non-terminal.
                        if (!next->IsTerminal()) {
                            // Remember that this item is waiting for completion with {next}.
                            waiting[{ pos, next }].insert(&item);
                        }
                        for (size_t i = 0; i < next->rule_number(); ++i) {
                            Rule* rule = next->rule(i);
                            auto already_completed = processed->find(Item { rule, rule->right().size(), pos, pos });
                            // As discussed in section 3 of
                            //    Aycock, John, and R. Nigel Horspool. "Practical earley
                            //    parsing." The Computer Journal 45.6 (2002): 620-630.
                            // Earley parsing has the following problem with epsilon rules:
                            // When we complete an item that started at the current position
                            // (that is, it matched zero tokens), we might not yet have
                            // predicted all items it can complete with. Thus we check for the
                            // existence of such items here and complete them immediately.
                            if (already_completed != processed->end()) {
                                worklist.push_back(item.Advance(pos, &*already_completed));
                            } else {
                                worklist.push_back(Item { rule, 0, pos, pos });
                            }
                        }
                    }
                }
                std::swap(worklist, future_items);
            }

            auto final_item = processed->find(Item { top_level.rule(0), 1, 0, input_length });
            if (final_item != processed->end()) {
                // Success: The {top_level} rule matches the complete input.
                return final_item->Children()[0];
            }
            std::string reason;
            const Item& last_item = *debug_trace.back();
            if (last_item.pos() < tokens.token_symbols.size()) {
                std::string next_token = tokens.token_contents[last_item.pos()].ToString();
                reason = "unexpected token \"" + next_token + "\"";
            } else {
                reason = "unexpected end of input";
            }
            ReportError("Parser Error: " + reason);
        }

        // static
        bool Grammar::MatchChar(int (*char_class)(int), InputPosition* pos)
        {
            if (**pos && char_class(static_cast<unsigned char>(**pos))) {
                ++*pos;
                return true;
            }
            return false;
        }

        // static
        bool Grammar::MatchChar(bool (*char_class)(char), InputPosition* pos)
        {
            if (**pos && char_class(**pos)) {
                ++*pos;
                return true;
            }
            return false;
        }

        // static
        bool Grammar::MatchString(const char* s, InputPosition* pos)
        {
            InputPosition current = *pos;
            for (; *s != 0; ++s, ++current) {
                if (*s != *current)
                    return false;
            }
            *pos = current;
            return true;
        }

        // static
        bool Grammar::MatchAnyChar(InputPosition* pos)
        {
            return MatchChar([](char c) { return true; }, pos);
        }

    } // namespace torque
} // namespace internal
} // namespace v8
